In [1]:
from __future__ import print_function
import numpy as np
import mxnet as mx
from mxnet import gluon
from tqdm import tqdm
In [2]:
data_ctx = mx.cpu()
model_ctx = mx.cpu()
In [3]:
batch_size = 64
num_inputs = 784
num_outputs = 10
num_examples = 60000
In [4]:
def transform(data, label):
return data.astype(np.float32) / 255, label.astype(np.float32)
In [5]:
train_data = gluon.data.DataLoader(gluon.data.vision.MNIST(train=True,
transform=transform),
batch_size, shuffle=True)
test_data = gluon.data.DataLoader(gluon.data.vision.MNIST(train=False,
transform=transform),
batch_size, shuffle=False)
In [6]:
net = gluon.nn.Dense(num_outputs)
In [7]:
net.collect_params().initialize(mx.init.Normal(sigma=1.), ctx=model_ctx)
In [8]:
softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()
In [9]:
trainer = gluon.Trainer(params=net.collect_params(),
optimizer='sgd',
optimizer_params={'learning_rate': 0.1})
In [10]:
def evaluate_accuracy(data_iterator, net):
acc = mx.metric.Accuracy()
for i, (data, label) in enumerate(data_iterator):
data = data.as_in_context(model_ctx).reshape((-1,784))
label = label.as_in_context(model_ctx)
output = net(data)
predictions = mx.nd.argmax(output, axis=1)
acc.update(preds=predictions, labels=label)
return acc.get()[1]
In [11]:
evaluate_accuracy(test_data, net)
Out[11]:
In [12]:
# Hyperparameters
epochs = 10
moving_loss = 0.
In [13]:
for e in tqdm(range(epochs)):
cumulative_loss = 0
for i, (data, label) in enumerate(train_data):
data = data.as_in_context(model_ctx).reshape((-1,784))
label = label.as_in_context(model_ctx)
with mx.autograd.record():
output = net(data)
loss = softmax_cross_entropy(output, label)
loss.backward()
trainer.step(batch_size)
cumulative_loss += mx.nd.sum(loss).asscalar()
test_accuracy = evaluate_accuracy(test_data, net)
train_accuracy = evaluate_accuracy(train_data, net)
print("Epoch %s. Loss: %s, Train_acc: %s, Test_acc: %s" % (e,
cumulative_loss / num_examples,
train_accuracy,
test_accuracy))
In [14]:
import matplotlib.pyplot as plt
In [15]:
# Prediction function
def model_predict(net,data):
output = net(data.as_in_context(model_ctx))
return mx.nd.argmax(output, axis=1)
In [16]:
# let's sample 10 random data points from the test set
sample_data = mx.gluon.data.DataLoader(dataset=mx.gluon.data.vision.MNIST(train=False,
transform=transform),
batch_size=10, shuffle=True)
In [17]:
for i, (data, label) in enumerate(sample_data):
data = data.as_in_context(model_ctx)
print(data.shape)
im = mx.nd.transpose(data,(1, 0, 2, 3))
im = mx.nd.reshape(im,(28, 10*28,1))
imtiles = mx.nd.tile(im, (1,1,3))
plt.imshow(imtiles.asnumpy())
plt.show()
pred=model_predict(net,data.reshape((-1,784)))
print('model predictions are:', pred)
break